COVID 19 - some simple data visualizations for US data
Some other covid19 visualizations:
https://coronavirus.1point3acres.com/
https://coronavirus.jhu.edu/map.html
# data source https://www.census.gov/data/datasets/time-series/demo/popest/2010s-state-total.html and wikipedia
df_population <- data.frame(
state = c("AK", "AL", "AR", "AS", "AZ", "CA", "CO", "CT", "DC", "DE", "FL",
"GA", "GU", "HI", "IA", "ID", "IL", "IN", "KS", "KY", "LA", "MA",
"MD", "ME", "MI", "MN", "MO", "MP", "MS", "MT", "NC", "ND", "NE",
"NH", "NJ", "NM", "NV", "NY", "OH", "OK", "OR", "PA", "PR", "RI",
"SC", "SD", "TN", "TX", "UT", "VA", "VI", "VT", "WA", "WI", "WV", "WY"),
population = c(731545, 4903185, 3017804, 55465 , 7278717, 39512223, 5758736, 3565287, 705749, 973764, 21477737,
10617423, 165768, 1415872, 3155070, 1787065, 12671821, 6732219, 2913314, 4467673, 4648794, 6892503,
6045680, 1344212, 9986857, 5639632, 6137428, 56882, 2976149, 1068778, 10488084, 762062, 1934408,
1359711, 8882190, 2096829, 3080156, 19453561, 11689100, 3956971, 4217737, 12801989, 3193694, 1059361,
5148714, 884659, 6829174, 28995881, 3205958, 8535519, 106977 , 623989, 7614893, 5822434, 1792147, 578759)
)
# The Atlantic Monthly Group (CC BY-NC 4.0)
# source: https://covidtracking.com/api
df_states <- fread("https://covidtracking.com/api/v1/states/daily.csv") %>%
replace(is.na(.), 0) %>%
inner_join(df_population, by = "state")%>%
mutate(date = as.Date(as.character(date), "%Y%m%d"))
tableau10 <- as.list(ggthemes_data[["tableau"]][["color-palettes"]][["regular"]][[1]][,2])$value
first_day <- as.Date("2020-03-15") # to select a date
today <- as.Date(toString(max(df_states$date)))
kable(head(df_states, n = 3))| date | state | positive | probableCases | negative | pending | totalTestResultsSource | totalTestResults | hospitalizedCurrently | hospitalizedCumulative | inIcuCurrently | inIcuCumulative | onVentilatorCurrently | onVentilatorCumulative | recovered | lastUpdateEt | dateModified | checkTimeEt | death | hospitalized | hospitalizedDischarged | dateChecked | totalTestsViral | positiveTestsViral | negativeTestsViral | positiveCasesViral | deathConfirmed | deathProbable | totalTestEncountersViral | totalTestsPeopleViral | totalTestsAntibody | positiveTestsAntibody | negativeTestsAntibody | totalTestsPeopleAntibody | positiveTestsPeopleAntibody | negativeTestsPeopleAntibody | totalTestsPeopleAntigen | positiveTestsPeopleAntigen | totalTestsAntigen | positiveTestsAntigen | fips | positiveIncrease | negativeIncrease | total | totalTestResultsIncrease | posNeg | dataQualityGrade | deathIncrease | hospitalizedIncrease | hash | commercialScore | negativeRegularScore | negativeScore | positiveScore | score | grade | population |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2021-03-05 | AK | 56886 | 0 | 0 | 0 | totalTestsViral | 1731628 | 33 | 1293 | 0 | 0 | 2 | 0 | 0 | 3/5/2021 03:59 | 2021-03-05T03:59:00Z | 03/04 22:59 | 305 | 1293 | 0 | 2021-03-05T03:59:00Z | 1731628 | 68693 | 1660758 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 141 | 0 | 56886 | 7144 | 56886 | 0 | 2 | 3 | 7c69ab3363700e0bfc193bc3374d28a89a0d1102 | 0 | 0 | 0 | 0 | 0 | 0 | 731545 |
| 2021-03-05 | AL | 498887 | 107518 | 1924758 | 0 | totalTestsPeopleViral | 2316127 | 526 | 45976 | 0 | 2672 | 0 | 1514 | 295690 | 3/5/2021 11:00 | 2021-03-05T11:00:00Z | 03/05 06:00 | 10122 | 45976 | 0 | 2021-03-05T11:00:00Z | 0 | 0 | 0 | 391369 | 7943 | 2179 | 0 | 2316127 | 0 | 0 | 0 | 119005 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 811 | 4223 | 2423645 | 4920 | 2423645 | 0 | 28 | 169 | 5789acffb29d541bdafc68570b05d49d8e08dc97 | 0 | 0 | 0 | 0 | 0 | 0 | 4903185 |
| 2021-03-05 | AR | 324326 | 68961 | 2472738 | 0 | totalTestsViral | 2728103 | 359 | 14903 | 153 | 0 | 86 | 1530 | 314732 | 3/5/2021 00:00 | 2021-03-05T00:00:00Z | 03/04 19:00 | 5283 | 14903 | 0 | 2021-03-05T00:00:00Z | 2728103 | 0 | 2472738 | 255365 | 4291 | 992 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 479119 | 81647 | 0 | 0 | 5 | 570 | 8452 | 2797064 | 8874 | 2797064 | 0 | 10 | 27 | a06fef34948008705e4a705542367a074df3e6f8 | 0 | 0 | 0 | 0 | 0 | 0 | 3017804 |
0.1 Rhode Island (as I live in RI now)
df_states %>% filter(state == "RI") %>%
ggplot() +
geom_label(x = first_day, y = 2000, color = "darkgray", label = "total positive", size = 2, hjust = 0) +
geom_text(mapping = aes(x = date, y = 2100, label = positive), color = "darkgray", size = 2, angle = 90, hjust = 0)+
#geom_label(x = first_day, y = 800, color = "black", label = "death", size = 2, hjust = 0) +
geom_label(x = first_day, y = 2000, color = tableau10[2], label = "positiveIncrease", size = 2, hjust = 0) +
geom_label(x = first_day, y = 1900, color = tableau10[1], label = "hospitalizedCurrently", size = 2, hjust = 0) +
# geom_line(mapping = aes(x = date, y = death), alpha = 0.7, color = "black", size = LINE_SIZE) +
# geom_text(mapping = aes(x = date - 0.5, y = death + 10, label = death), color = "black", size = 1.5) +
# geom_point(mapping = aes(x = date, y = death), color = "black", shape = 10) +
geom_line(mapping = aes(x = date, y = hospitalizedCurrently), alpha = 0.7, color = tableau10[1], size = LINE_SIZE) +
geom_text(mapping = aes(x = date - 0.5, y = hospitalizedCurrently + 20, label = hospitalizedCurrently), color = tableau10[1], size = 1.25) +
geom_point(mapping = aes(x = date, y = hospitalizedCurrently), color = tableau10[1], shape = 15) +
geom_line(mapping = aes(x = date, y = positiveIncrease), alpha = 0.7, color = tableau10[2], size = LINE_SIZE) +
geom_text(mapping = aes(x = date - 0.5, y = positiveIncrease + 20, label = positiveIncrease), color = tableau10[2], size = 1.25)+
geom_point(mapping = aes(x = date, y = positiveIncrease), color = tableau10[2]) +
scale_x_date(limits = c(first_day, today), breaks = seq(first_day, today, by = "week")) +
xlab("Date") + ylab("") + ggtitle("RI")0.2 US - all states
df_states %>% group_by(date) %>%
summarise(positiveIncrease = sum(positiveIncrease), hospitalizedCurrently = sum(hospitalizedCurrently), total = sum(positive)) %>%
ungroup() %>%
ggplot() +
geom_label(x = first_day, y = 270000, color = "darkgray", label = "total positive: ", size = 2, hjust = 0) +
geom_text(mapping = aes(x = date, y = 260000, label = total), color = "darkgray", size = 2, angle = 90, hjust = 0) +
geom_label(x = first_day, y = 250000, color = tableau10[1], label = "hospitalizedCurrently", size = 2, hjust = 0) +
geom_label(x = first_day, y = 240000, color = tableau10[2], label = "positiveIncrease", size = 2, hjust = 0) +
geom_line(mapping = aes(x = date, y = hospitalizedCurrently), alpha = 0.7, color = tableau10[1], size = LINE_SIZE) +
geom_text(mapping = aes(x = date - 0.5, y = hospitalizedCurrently + 5000, label = hospitalizedCurrently), color = tableau10[1], size = 1.25) +
geom_point(mapping = aes(x = date, y = hospitalizedCurrently), color = tableau10[1], shape = 15) +
geom_line(mapping = aes(x = date, y = positiveIncrease), alpha = 0.7, color = tableau10[2], size = LINE_SIZE) +
geom_text(mapping = aes(x = date - 0.5, y = positiveIncrease + 5000, label = positiveIncrease), color = tableau10[2], size = 1.25) +
geom_point(mapping = aes(x = date, y = positiveIncrease), color = tableau10[2]) +
scale_x_date(limits = c(first_day, today), breaks = seq(first_day, today, by = "week")) +
xlab("Date") + ylab("") + ggtitle("US - positiveIncrease & hospitalizedCurrently")0.3 US - daily top-2 contributors
If a state has been a top 2 contributor
as_top <- df_states %>%
filter(date > first_day)%>%
mutate(str_date = as.character(date))%>%
group_by(str_date) %>%
arrange(positiveIncrease, by_group = TRUE)%>%
slice_tail(n = 2) %>%
ungroup() %>%
summarise(unique(state))
as_top <- unlist(as_top)
df_states %>%
filter(state %in% as_top) %>%
ggplot() +
stat_steamgraph(mapping = aes(x = date, y = positiveIncrease, group = state, fill = state)) +
scale_x_date(limits = c(first_day, today), breaks = seq(first_day, today, by = "week")) +
scale_y_continuous(breaks = seq(-60000, 60000, by = 10000), labels = c("60000","50000","400000", "30000", "20000", "10000", "0", "10000", "20000", "30000", "40000", "50000","60000")) +
scale_fill_manual(values = TABEALU20) +
xlab("Date") + ylab("positiveIncrease") + ggtitle("If a state was a top-2 contributor on a day")0.4 US - positiveIncrease by state
num_lag <- 21
find_coef <- function(x, y){
m <- lm(y ~ x)
return(coef(m)[2])
}
df_colors <- df_states %>%
group_by(state)%>%
arrange(date, .by_group = TRUE) %>%
slice_tail(n = num_lag) %>% # last N days
summarise(trend_coef = find_coef(date, positiveIncrease)) %>%
mutate(trend_color = ifelse(trend_coef > 0, "increasing", ifelse(trend_coef < 0, "decreasing", "stable"))) %>%
ungroup()%>%
replace(is.na(.), 0) %>%
select(state, trend_coef, trend_color)
df_states %>%
inner_join(df_colors, by = "state") %>%
ggplot() +
geom_smooth(mapping = aes(x = date, y = positiveIncrease), color = "gray", alpha = 0.3, method = "loess", size = LINE_SIZE) +
geom_line(mapping = aes(x = date, y = positiveIncrease, color = trend_color), alpha = 0.7, size = LINE_SIZE) +
geom_point(mapping = aes(x = date, y = positiveIncrease, color = trend_color), size = 1) +
scale_x_date(limits = c(first_day, today), breaks = seq(first_day, today, by = "month")) +
scale_colour_tableau() +
facet_wrap(state ~ ., ncol = 6, scales = "free") +
xlab("Date") + ylab("") + ggtitle("US - positiveIncrease by state, colored by the trend of last 21 days")df_states %>%
inner_join(df_colors, by = "state") %>%
mutate(positiveIncreasePerMillion = positiveIncrease / population * 1000000)%>%
ggplot() +
geom_smooth(mapping = aes(x = date, y = positiveIncreasePerMillion), color = "gray", alpha = 0.3, method = "loess", size = LINE_SIZE) +
geom_line(mapping = aes(x = date, y = positiveIncreasePerMillion, color = trend_color), alpha = 0.7, size = LINE_SIZE) +
geom_point(mapping = aes(x = date, y = positiveIncreasePerMillion, color = trend_color), size = 1) +
scale_y_continuous(limits = c(0, 1500), breaks = seq(0, 1500, by = 500)) +
scale_x_date(limits = c(first_day, today), breaks = seq(first_day, today, by = "month")) +
scale_colour_tableau() +
facet_wrap(state ~ ., ncol = 6, scales = "free") +
xlab("Date") + ylab("") + ggtitle("US - positiveIncreasePerMillion by state, colored by the trend of last 21 days")0.5 US - hospitalizedCurrently by state
df_states %>%
ggplot() +
geom_smooth(mapping = aes(x = date, y = hospitalizedCurrently), color = "gray", alpha = 0.3, method = "loess", size = LINE_SIZE) +
geom_line(mapping = aes(x = date, y = hospitalizedCurrently), alpha = 0.7, color = tableau10[3], size = LINE_SIZE) +
geom_point(mapping = aes(x = date, y = hospitalizedCurrently), color = tableau10[3], size = 1) +
scale_x_date(limits = c(first_day, today), breaks = seq(first_day, today, by = "month")) +
facet_wrap(state ~ ., ncol = 6, scales = "free") +
xlab("Date") + ylab("") + ggtitle("US - hospitalizedCurrently by state")0.6 US - dailyTestPositiveRate against overallTestedPopulationRate
df_pr <- df_states %>%
mutate(testPositiveRate = positiveIncrease / totalTestResultsIncrease, testedPopulationRate = totalTestResults / population) %>%
filter(testPositiveRate > 0 & testPositiveRate < 1) # rm buggy data to allow log scales
df_pr_colors <- df_pr %>%
group_by(state)%>%
arrange(date, .by_group = TRUE) %>%
slice_tail(n = num_lag) %>% # last N days
summarise(trend_coef = find_coef(date, testPositiveRate)) %>%
mutate(trend_color = ifelse(trend_coef > 0, "increasing", ifelse(trend_coef < 0, "decreasing", "stable"))) %>%
ungroup()%>%
replace(is.na(.), 0) %>%
select(state, trend_coef, trend_color)
df_pr_summary <- df_states %>%
filter(date > as.Date('2020-07-31') & date < as.Date('2020-11-14'))%>%
group_by(date) %>%
summarise(national_positive = sum(positiveIncrease), national_tested = sum(totalTestResultsIncrease))%>%
mutate(testPositiveRate = national_positive / national_tested)%>%
ungroup() %>%
summarise(testPositiveRate_mean = median(testPositiveRate), testPositiveRate_sd = mad(testPositiveRate), per95 = quantile(testPositiveRate, probs = 0.95))
df_pr %>%
inner_join(df_pr_colors, by = "state") %>%
ggplot() +
geom_smooth(mapping = aes(x = testedPopulationRate, y = testPositiveRate), color = "gray", alpha = 0.3, method = "loess", size = LINE_SIZE) +
geom_line(mapping = aes(x = testedPopulationRate, y = testPositiveRate, color = trend_color), alpha = 0.7, size = LINE_SIZE) +
geom_point(mapping = aes(x = testedPopulationRate, y = testPositiveRate, color = trend_color), size = 1) +
scale_x_continuous(limits = c(0, 2.5), breaks = seq(0, 2.5, by = 0.25)) +
scale_y_continuous(limits = c(0.001, 1), trans = 'log10', breaks = c(0.001, 0.01, 0.05, 0.1, 0.2, 0.3, 0.5, 0.75, 1)) +
scale_colour_tableau() +
facet_wrap(state ~ ., ncol = 6, scales = "free") +
xlab("dailyTestPositiveRate") + ylab("overallTestedPopulationRate") + ggtitle("US - dailyTestPositiveRate against overallTestedPopulationRate")0.7 US - death per 10k by state
df_states %>%
mutate(deathPer10K = death / population * 10000) %>%
ggplot() +
geom_line(mapping = aes(x = date, y = deathPer10K), alpha = 0.7, color = tableau10[3], size = LINE_SIZE) +
geom_point(mapping = aes(x = date, y = deathPer10K), color = tableau10[3], size = 1) +
scale_x_date(limits = c(first_day, today), breaks = seq(first_day, today, by = "month")) +
scale_y_continuous(limits = c(0, 50), breaks = seq(0, 50, by = 10)) +
facet_wrap(state ~ ., ncol = 6, scales = "free") +
xlab("date") + ylab("death per 10k") + ggtitle("US - death per 10k by state")0.8 US - positive per 1k by state
df_states %>%
mutate(positivePerOneK = positive / population * 1000) %>%
ggplot() +
geom_line(mapping = aes(x = date, y = positivePerOneK), alpha = 0.7, color = tableau10[4], size = LINE_SIZE) +
geom_point(mapping = aes(x = date, y = positivePerOneK), color = tableau10[4], size = 1) +
scale_y_continuous(limits = c(0, 150), breaks = seq(0, 150, by = 30)) +
scale_x_date(limits = c(first_day, today), breaks = seq(first_day, today, by = "month")) +
facet_wrap(state ~ ., ncol = 6, scales = "free") +
xlab("date") + ylab("") + ggtitle("US - positivePerOneK by state")0.9 US - tested amount by state
df_states %>%
mutate(testResultsIncrease = positiveIncrease + negativeIncrease) %>%
ggplot() +
geom_smooth(mapping = aes(x = date, y = testResultsIncrease), color = "gray", alpha = 0.3, method = "loess", size = LINE_SIZE) +
geom_line(mapping = aes(x = date, y = testResultsIncrease), alpha = 0.7, color = tableau10[7], size = LINE_SIZE) +
geom_point(mapping = aes(x = date, y = testResultsIncrease), color = tableau10[7], size = 1) +
scale_x_date(limits = c(first_day, today), breaks = seq(first_day, today, by = "month")) +
facet_wrap(state ~ ., ncol = 6, scales = "free") +
xlab("date") + ylab("testResultsIncrease") + ggtitle("US - testResultsIncrease by state")